# load packages, installing if missing
if (!require(librarian)){
  install.packages("librarian")
  library(librarian)
}
## Loading required package: librarian
librarian::shelf(
  dismo, dplyr, DT, ggplot2, here, htmltools, leaflet, mapview, purrr, raster, readr, rgbif, rgdal, rJava, sdmpredictors, sf, spocc, tidyr, tidyverse)
## 
##   The 'cran_repo' argument in shelf() was not set, so it will use
##   cran_repo = 'https://cran.r-project.org' by default.
## 
##   To avoid this message, set the 'cran_repo' argument to a CRAN
##   mirror URL (see https://cran.r-project.org/mirrors.html) or set
##   'quiet = TRUE'.
select <- dplyr::select # overwrite raster::select

# set random seed for reproducibility
set.seed(42)

# directory to store data
dir_data <- here("data/sdm")
dir.create(dir_data, showWarnings = F)
obs_csv <- file.path(dir_data, "obs.csv")
obs_geo <- file.path(dir_data, "obs.geojson")

# get species occurrence data from GBIF with coordinates
(res <- spocc::occ(
# query = 'Bradypus variegatus', # sloth
# query = 'Phengaris alcon', # Alcon blue
# query = 'Camponotus pennsylvanicus', # ant
  query = 'Latrodectus mactans', # black widow
  from = 'gbif', has_coords = T,
  limit = 10,000))
## Searched: gbif
## Occurrences - Found: 3,539, Returned: 10
## Search type: Scientific
##   gbif: Latrodectus mactans (10)
# extract data frame from result
df <- res$gbif$data[[1]] 
nrow(df) # number of rows
## [1] 10
# convert to points of observation from lon/lat columns in data frame
obs <- df %>% 
  sf::st_as_sf(
    coords = c("longitude", "latitude"),
    crs = st_crs(4326))

readr::write_csv(df, obs_csv)
sf::write_sf(obs, obs_geo)

# show points on map
# mapview::mapview(obs, map.types = "Stamen.Terrain")
mapview::mapview(obs, map.types = "Stamen.Watercolor")

Question 1.

How many observations total are in GBIF for your species? (Hint: ?occ)

Found: 3,539 ???

Question 2.

Do you see any odd observations, like marine species on land or vice versa? If so, please see the Data Cleaning and explain what you did to fix or remove these points.

Get Environmental Data

Presence

dir_env <- file.path(dir_data, "env")

# set a default data directory
options(sdmpredictors_datadir = dir_env)

# choosing terrestrial
env_datasets <- sdmpredictors::list_datasets(terrestrial = TRUE, marine = FALSE)

# show table of datasets
env_datasets %>% 
  select(dataset_code, description, citation) %>% 
  DT::datatable()
# choose datasets for a vector
env_datasets_vec <- c("WorldClim", "ENVIREM")

# get layers
env_layers <- sdmpredictors::list_layers(env_datasets_vec)
DT::datatable(env_layers)
# choose layers after some inspection and perhaps consulting literature
env_layers_vec <- c("WC_alt", "WC_bio1", "WC_bio2", "ER_tri", "ER_topoWet")

# get layers
env_stack <- load_layers(env_layers_vec)

# interactive plot layers, hiding all but first (select others)
mapview(env_stack, hide = T)
obs_hull_geo <- file.path(dir_data, "obs_hull.geojson")

# make convex hull around points of observation
obs_hull <- sf::st_convex_hull(st_union(obs))

# show points on map
mapview(
  list(obs, obs_hull))
# save obs hull
write_sf(obs_hull, obs_hull_geo)

obs_hull_sp <- sf::as_Spatial(obs_hull)

env_stack <- raster::mask(env_stack, obs_hull_sp) %>% 
  raster::crop(extent(obs_hull_sp))

mapview(obs) + 
  mapview(env_stack, hide = T)

Pseudo-Absence

absence_geo <- file.path(dir_data, "absence.geojson")
pts_geo     <- file.path(dir_data, "pts.geojson")
pts_env_csv <- file.path(dir_data, "pts_env.csv")

# get raster count of observations
r_obs <- rasterize(
  sf::as_Spatial(obs), env_stack[[1]], field=1, fun='count')

mapview(obs) + 
  mapview(r_obs)
# create mask for 
r_mask <- mask(env_stack[[1]] > -Inf, r_obs, inverse=T)

absence <- dismo::randomPoints(r_mask, nrow(obs)) %>% 
  as_tibble() %>% 
  st_as_sf(coords = c("x", "y"), crs = 4326)

mapview(obs, col.regions = "green") + 
  mapview(absence, col.regions = "gray")
# combine presence and absence into single set of labeled points 
pts <- rbind(
  obs %>% 
    mutate(
      present = 1) %>% 
    select(present),
  absence %>% 
    mutate(
      present = 0)) %>% 
  mutate(
    ID = 1:n()) %>% 
  relocate(ID)
write_sf(pts, pts_geo)

# extract raster values for points
pts_env <- raster::extract(env_stack, as_Spatial(pts), df=TRUE) %>% 
  tibble() %>% 
  # join present and geometry columns to raster value results for points
  left_join(
    pts %>% 
      select(ID, present),
    by = "ID") %>% 
  relocate(present, .after = ID) %>% 
  # extract lon, lat as single columns
  mutate(
    #present = factor(present),
    lon = st_coordinates(geometry)[,1],
    lat = st_coordinates(geometry)[,2]) %>% 
  select(-geometry)

write_csv(pts_env, pts_env_csv)
pts_env %>% 
  select(-ID) %>% 
  DT::datatable()

Term Plots

pts_env %>% 
  select(-ID) %>% 
  mutate(
    present = factor(present)) %>% 
  pivot_longer(-present) %>% 
  ggplot() +
  geom_density(aes(x = value, fill = present)) + 
  scale_fill_manual(values = alpha(c("gray", "green"), 0.5)) +
  scale_x_continuous(expand=c(0,0)) +
  scale_y_continuous(expand=c(0,0)) +
  theme_bw() + 
  facet_wrap(~name, scales = "free") +
  theme(
    legend.position = c(1, 0),
    legend.justification = c(1, 0))